SETUP

suppressPackageStartupMessages(library(package = "knitr"))
suppressPackageStartupMessages(library(package = "Biobase"))
suppressPackageStartupMessages(library(package = "RColorBrewer"))
suppressPackageStartupMessages(library(package = "tidyverse"))
suppressPackageStartupMessages(library(package = "ggbeeswarm"))
suppressPackageStartupMessages(library(package = "pvca")) # dleelab/pvca
suppressPackageStartupMessages(library(package = "lme4"))
suppressPackageStartupMessages(library(package = "cluster"))
suppressPackageStartupMessages(library(package = "ComplexHeatmap")) # Bioc Install
suppressPackageStartupMessages(library(package = "ImmuneSignatures2"))

These figures only look at the “Young” cohort that is defined as 18 to 50 years old.

noRespGEFile <- list.files(path       = dataCacheDir,
                           pattern    = "young_norm_noResponse_eset.rds",
                           full.names = TRUE)
esetNoResp <- readRDS(file = noRespGEFile)

withRespGEFile <- list.files(path       = dataCacheDir,
                             pattern    = "young_norm_withResponse_eset.rds",
                             full.names = TRUE)
esetWithResp <- readRDS(file = withRespGEFile)

FIGURES

Figure 1b: Plot number of samples as a function of day of sampling

Authors: Slim Fourati, Joanna Arce

# any sampling data after 20 days coded as >20
df.fig1b <- pData(esetNoResp) %>%
  select(uid, study_time_collected, pathogen, vaccine_type) %>%
  arrange(study_time_collected) %>%
  mutate(study_time_collected = signif(study_time_collected, digits = 3),
     study_time_collected.factor = ifelse(test = study_time_collected > 20,
                          yes  = ">20",
                          no   = study_time_collected),
     study_time_collected.factor =
       factor(study_time_collected.factor,
          levels = unique(study_time_collected.factor)),
     vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>%
  group_by(study_time_collected.factor, vaccine) %>%
  summarize(n = n())

ggplot(data = df.fig1b,
       mapping = aes(x = study_time_collected.factor,
                         y = n)) +
  geom_bar(stat = "identity", mapping = aes(fill = vaccine)) +
  labs(x = "Days post-vaccination", y = "Number of samples") +
  scale_y_continuous(limits = c(0, 800),
                         breaks = seq(from = 0, to = 800, by = 100)) + 
  scale_fill_manual(name   = "Pathogen (Vaccine type)",
                        values = vaccine2color) +
  theme_bw() +
  theme(panel.grid         = element_blank(),
        panel.grid.major.y = element_line(color = "lightgrey"),
        axis.text          = element_text(color = "black"),
        axis.text.x        = element_text(angle = 45, hjust = 1),
        legend.pos         = "bottom",
        legend.text        = element_text(size = 7),
        legend.key.height  = unit(0.02, units = "npc"),
        legend.key.width   = unit(0.015, units = "npc")) +
  guides(fill = guide_legend(ncol = 2))

Figure 1c: Plot age as a function of vaccine investigated

Author: Slim Fourati

df.fig1c <- pData(esetNoResp) %>%
  select(participant_id, pathogen, vaccine_type, age_imputed, gender_imputed) %>%
  distinct() %>%
  mutate(vaccine = paste0(pathogen, " (", vaccine_type, ")"))

# remove vaccine where all the participant have the same age
flag <- df.fig1c %>%
  group_by(vaccine) %>%
  summarize(nbUniqueAge = length(unique(age_imputed))) %>%
  filter(nbUniqueAge > 1)

df.fig1c <- filter(df.fig1c, vaccine %in% flag$vaccine)

ggplot(data = df.fig1c,
       mapping = aes(x = vaccine,
                         y = age_imputed)) +
  geom_boxplot(fill = "transparent", outlier.color = "transparent") +
  geom_beeswarm(mapping = aes(color = vaccine, shape = gender_imputed),
                    cex = 0.3) +
  scale_color_manual(name   = "Pathogen (Vaccine type)",
                     values = vaccine2color) +
  labs(x = "Pathogen (Vaccine type)", y = "Age") + 
  theme_bw() +
  theme(panel.grid         = element_blank(),
        panel.grid.major.y = element_line(color = "lightgrey"),
        axis.text          = element_text(color = "black"),
        axis.text.x        = element_text(angle = 45, hjust = 1, size = 8),
        legend.text        = element_text(size = 7),
        legend.key.height  = unit(0.02, units = "npc"),
        legend.key.width   = unit(0.015, units = "npc")) +
  guides(fill = guide_legend(ncol = 1))

# test for difference between vaccine
kruskal.test(formula = age_imputed ~ vaccine, data = df.fig1c)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  age_imputed by vaccine
## Kruskal-Wallis chi-squared = 110.1, df = 7, p-value < 2.2e-16

Figure 1d: Proportion of Variance Explained

Author: Slim Fourati

em.fig1d <- exprs(esetNoResp)
pd.fig1d <- pData(esetNoResp) %>%
  mutate(Vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>%
  select(uid, gender_imputed, ethnicity, Vaccine, study_time_collected,
     age_imputed) %>%
  rename(Gender    = gender_imputed,
     Ethnicity = ethnicity,
     Timepoint = study_time_collected,
     Age       = age_imputed) %>%
  column_to_rownames(var = "uid")
pd.fig1d <- pd.fig1d[colnames(em.fig1d), ]
df.fig1d <- data.frame(explained = as.vector(fit),
                       effect    = names(fit)) %>%
  arrange(explained) %>%
  mutate(effect = factor(effect, levels = effect))

ggplot(data    = df.fig1d,
       mapping = aes(x = effect, y = explained)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = signif(explained, digits = 3)),
            nudge_y   = 0.01,
            size      = 3) +
  labs(x = NULL, y = "Proportion of the variance explained") + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90,
                                   vjust = 0.5,
                                   hjust = 1))

Perform Sample Level Enrichment Analysis on hallmark+BTM genesets

Author: Slim Fourati

Pre-Processing: Microarray probes may have different affinities for their target mRNA and thus the itensities can’t be compared between probes inside the same sample. A way to correct for that is to scale each probe to a mean of 0 and a standard-deviation of 1.

The following looks only at pre-vaccination timepoints

  sleaFile <- file.path(dataCacheDir, "sleaSet.rds")
  sleaCached <- file.exists(sleaFile)
  if(sleaCached){
    sleaSet <- readRDS(sleaFile)
  }
batch <- interaction(esetNoResp$study_accession,
                     esetNoResp$matrix,
                     drop = TRUE)
scaledMat <- by(data = t(exprs(esetNoResp)),
                INDICES = batch,
                FUN = function(x) scale(x)) %>%
  do.call(what = rbind) %>%
  t()
scaledMat <- scaledMat[, sampleNames(esetNoResp)]

# SLEA (Ref: Lopez-Bigas N. et al. (2008) Genome Biol.)
B <- 100
flag <- lapply(all_genesets, FUN = intersect, y = rownames(scaledMat)) %>%
  sapply(FUN = length)
zscoreMat <- sapply(all_genesets, FUN = function(gs) {
  gs <- intersect(gs, rownames(scaledMat))
  ngenes <- length(gs)
  mu <- colMeans(scaledMat[gs, , drop = FALSE], na.rm = TRUE)
  muPermut <- mclapply(1:B, FUN = function(seed) {
    set.seed(seed = seed)
    muHat <- colMeans(scaledMat[sample.int(nrow(scaledMat), ngenes), , drop = FALSE],
                      na.rm = TRUE)
    return(value = muHat)
  })
  muPermut <- do.call(what = rbind, args = muPermut)
  zscore <- (mu - colMeans(muPermut, na.rm = TRUE)) /
    apply(muPermut, MARGIN = 2, FUN = sd, na.rm = TRUE)
  return(value = zscore)
})
sleaSet <- ExpressionSet(assayData = t(zscoreMat),
                         phenoData = AnnotatedDataFrame(pData(esetNoResp)))
saveRDS(sleaSet, file = sleaFile)
inflamGS <- c("HALLMARK_INFLAMMATORY_RESPONSE",
              "HALLMARK_COMPLEMENT",
              "HALLMARK_IL6_JAK_STAT3_SIGNALING",
              "HALLMARK_TNFA_SIGNALING_VIA_NFKB")

Figure 2: Heatmap of SLEA zscore on pre-vaccination samples

Author: Slim Fourati

zscoreTemp <- exprs(sleaSet)[, sleaSet$study_time_collected <= 0]

# remove genesets with missing symbols in virtual study
zscoreTemp <- zscoreTemp[rowMeans(is.na(zscoreTemp)) < 1, ]

# for representation purpsis only present top 200 varying genesets
varLS <- apply(zscoreTemp, MARGIN = 1, FUN = var)
zscoreTemp <- zscoreTemp[order(varLS, decreasing = TRUE)[1:200], ]

# cutree into three groups (see gap statistic analysis
set.seed(seed = 23)
tree_col  <- kmeans(x = t(zscoreTemp), centers = 3)
gr <- tree_col$cluster

# relabel cluster based on inflammatory genesets
df.fig2 <- data.frame(mu = colMeans(zscoreTemp[inflamGS, ])) %>%
  rownames_to_column()

df.fig2 <- data.frame(grn = gr) %>%
  rownames_to_column() %>%
  merge(x = df.fig2) %>%
  merge(y = pData(sleaSet),
        by.x = "rowname",
        by.y = "uid")

df.fig2 %>%
  group_by(grn) %>%
  summarize(q2 = median(mu)) %>%
  ungroup() %>%
  mutate(gr = ifelse(q2 %in% min(q2),
                     yes = "low",
                     no  = NA),
         gr = ifelse(q2 %in% max(q2) & is.na(gr),
                     yes = "high",
                     no  = gr),
         gr = ifelse(is.na(gr),
                     yes = "mid",
                     no  = gr),
         gr = factor(gr, levels = c("low", "mid", "high"))) %>%
  merge(x = df.fig2,
        by = "grn") %>%
  column_to_rownames(var = "rowname") -> df.fig2

df.fig2 <- df.fig2[colnames(zscoreTemp), ]

# Remove names of unwanted cells in plot
rowLabels <- rownames(zscoreTemp)

targetCellNames <- "B.cell|E2F|T.cell|MYC| NK| Interferon|STAT|migration|Monocytes| DC|Neutrophiles"

unwantedCells <- !grepl(pattern = targetCellNames, 
                        rownames(zscoreTemp),
                        ignore.case = TRUE)

rowLabels[ unwantedCells ] <- ""

# generate heatmap annotation
ha <- df.fig2 %>%
      rownames_to_column() %>%
      mutate(inflam.gs = findInterval(mu,
                                      vec = quantile(mu,
                                                             probs = c(0, 0.33, 0.66, 1)),
                                              rightmost.closed = TRUE),
             inflam.gs = paste0("tier", inflam.gs)) %>%
      select(rowname, inflam.gs) %>%
      column_to_rownames() %>%
      .[colnames(zscoreTemp), , drop = FALSE] %>%
      HeatmapAnnotation(df = ., 
                        col = list(inflam.gs = c(tier1 = "yellow",
                                                 tier2 = "orange",
                                                 tier3 = "red")))

Heatmap(matrix                      = zscoreTemp,
        row_names_side              = "left", 
        show_column_names           = FALSE,
        column_split                = df.fig2$gr,
        show_row_dend               = FALSE,
        name                        = "SLEA z-score",
        row_labels                  = rowLabels,
        row_names_gp                = gpar(fontsize = 8),
        top_annotation              = ha)

Figure s2a: Determine optimal number of cluster using the Gap statistic

Author: Slim Fourati

clusGapFile <- file.path(dataCacheDir, "clusGap.rds")
if(!file.exists(clusGapFile)){
  set.seed(seed = 23)
  fit <- clusGap(t(zscoreTemp), kmeans, K.max = 10)
  saveRDS(fit, file = clusGapFile)
}else{
  fit <- readRDS(clusGapFile)
}

plot(fit)

print(fit, method = "firstSEmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = t(zscoreTemp), FUNcluster = kmeans, K.max = 10)
## B=100 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
##  --> Number of clusters (method 'firstSEmax', SE.factor=1): 3
##           logW   E.logW       gap      SE.sim
##  [1,] 9.097460 9.751194 0.6537344 0.005032655
##  [2,] 8.987448 9.645416 0.6579687 0.003778555
##  [3,] 8.939257 9.611883 0.6726255 0.004373447
##  [4,] 8.906345 9.583541 0.6771966 0.004954919
##  [5,] 8.888788 9.565856 0.6770678 0.004154478
##  [6,] 8.870428 9.551132 0.6807041 0.003865681
##  [7,] 8.854358 9.537842 0.6834838 0.003517341
##  [8,] 8.844756 9.526233 0.6814774 0.004054859
##  [9,] 8.833201 9.514832 0.6816309 0.003835592
## [10,] 8.822944 9.505262 0.6823179 0.003576149

Assess association with clinical variable

# continuous variable
df.fig2 %>%
  select(gr,
         age_reported,
         age_imputed) %>%
  pivot_longer(cols = -gr) %>%
  group_by(name) %>%
  do(p = kruskal.test(formula = value~gr,
                      data    = .)$p.value) %>%
  ungroup() %>%
  mutate(p = unlist(p))
## # A tibble: 2 x 2
##   name             p
##   <chr>        <dbl>
## 1 age_imputed  0.630
## 2 age_reported 0.357
# categorical variable
df.fig2 %>%
   select(gr,
          study_time_collected,
          gender,
          race,
          ethnicity,
          exposure_material_reported,
          matrix,
          study_accession,
          Hispanic,
          White,
          Asian,
          Black,
          cell_type,
          cohort,
          featureSetName,
          featureSetName2,
          featureSetVendor,
          vaccine,
          vaccine_type,
          adjuvant,
          pathogen) %>%
  sapply(FUN = as.character) %>%
  as.data.frame() %>%
  pivot_longer(cols = -gr) %>%
  group_by(name) %>%
  do(p = {
    fit <- try(fisher.test(table(.$value, .$gr),
                           simulate.p.value = TRUE),
               silent = TRUE)
    if ("try-error" %in% class(fit)) {
      NA
    } else {
      fit$p.value
    }
  }) %>%
  ungroup() %>%
  mutate(p = unlist(p), adj.p = p.adjust(p, method = "BH"))
## # A tibble: 20 x 3
##    name                            p adj.p
##    <chr>                       <dbl> <dbl>
##  1 adjuvant                   0.394  0.702
##  2 Asian                      0.781  0.781
##  3 Black                      0.751  0.781
##  4 cell_type                  0.106  0.474
##  5 cohort                     0.109  0.474
##  6 ethnicity                  0.166  0.474
##  7 exposure_material_reported 0.270  0.656
##  8 featureSetName             0.157  0.474
##  9 featureSetName2            0.163  0.474
## 10 featureSetVendor           0.0500 0.474
## 11 gender                     0.329  0.659
## 12 Hispanic                   0.532  0.710
## 13 matrix                     0.295  0.656
## 14 pathogen                   0.481  0.702
## 15 race                       0.491  0.702
## 16 study_accession            0.651  0.779
## 17 study_time_collected       0.439  0.702
## 18 vaccine                    0.662  0.779
## 19 vaccine_type               0.741  0.781
## 20 White                      0.0255 0.474

Figure s2b: Stability of the signature over time

Author: Slim Fourati

df.fig2sb <- df.fig2 %>%
            filter(mu < 4.5 & mu > -4.5) %>% # remove outlier
            filter(duplicated(participant_id) |
                   duplicated(participant_id, fromLast = TRUE))

df.fig2sb %>%
  filter(study_accession %in% c("SDY80", "SDY180")) %>%
  arrange(participant_id, study_time_collected) %>%
  wilcox.test(formula = mu ~ study_time_collected,
              data    = .,
              paired  = TRUE)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  mu by study_time_collected
## V = 601, p-value = 0.002084
## alternative hypothesis: true location shift is not equal to 0
df.fig2sb %>%
  filter(study_accession %in% c("SDY80", "SDY180")) %>%                            
  select(participant_id, gr, mu, study_time_collected) %>%
  pivot_wider(names_from  = study_time_collected,
              values_from = c(gr, mu))  %>%
  mutate(stable = (`gr_-7` == `gr_0`) | abs(`mu_0`-`mu_-7`)< 1) %>%
  group_by(stable) %>%
  summarize(n = n())
## # A tibble: 2 x 2
##   stable     n
##   <lgl>  <int>
## 1 FALSE     20
## 2 TRUE      45
df.fig2sb %>%
  filter(study_accession %in% c("SDY80", "SDY180")) %>%                            
  select(participant_id, gr, mu, study_time_collected) %>%
  pivot_wider(names_from  = study_time_collected,
              values_from = c(gr, mu))  %>%
  mutate(stable = (`gr_-7` == `gr_0`) | abs(`mu_0`-`mu_-7`)< 1) %>%
  select(participant_id, stable) %>%
  merge(x = df.fig2sb, by = "participant_id", all.x = TRUE) -> df.fig2sb

ggplot(data = df.fig2sb,
        mapping = aes(x = factor(study_time_collected), y = mu)) +
        geom_point(mapping = aes(color = gr)) +
        geom_line(mapping = aes(group = participant_id,
                                linetype  = stable)) +
        scale_color_discrete(name = "pre-vax cluster") +
        labs(x = "study_time_collected",
             y = "Inflammatory genesets (SLEA z-score)") +
        theme_bw()

Figure 3 Helper Functions

getSleaPDSubset.byGS <- function(geneset){
  pd <- pData(sleaSet)
  pd$mu <- colMeans(exprs(sleaSet)[geneset, ])
  pd <- pd[ !is.na(pd$gr), ]
}

getSleaPDSubset.byGenes <- function(genes){
  df <- exprs(esetNoResp)[genes, ] %>%
          as.data.frame() %>%
          rownames_to_column(var = "gene") %>%
          pivot_longer(cols = -gene, names_to = "uid") %>%
          merge(y = pData(sleaSet), by = "uid") %>%
          filter(!is.na(gr))
}

plotSLEASubsetByTime.byGS <- function(pd, ylabel){
  ggplot(data = pd,
          mapping = aes(x = study_time_collected, y = mu, color = gr)) +
          geom_line(mapping = aes(group = participant_id),
                    alpha = 0.1) +
          geom_hline(yintercept = 0, color = "grey", size = 2) +
          geom_smooth(method = "loess", formula = "y~x", color = "black") +
          facet_wrap(facets = ~gr) +
          scale_x_continuous(limits = c(0, 28)) +
          labs(y = ylabel,
               x = "Time after vaccination (days)") +
          theme_bw() +
          theme(axis.text    = element_text(size = 15),
                strip.text   = element_text(size = 20),
                axis.title.x = element_text(size = 20),
                legend.pos   = "none",
                panel.grid  = element_blank())
}

plotSLEASubsetByTime.byGene <- function(df){
  q2DF <- df %>%
          group_by(gene) %>%
          summarize(q2 = median(value))
  
  ggplot(data = df,
          mapping = aes(x = study_time_collected, y = value, color = gr)) +
          geom_line(mapping = aes(group = participant_id),
                    alpha = 0.1) +
          geom_hline(data    = q2DF,
                     mapping = aes(yintercept = q2),
                     color   = "grey") +
          geom_smooth(method = "loess", formula = "y~x", color = "black") +
          facet_grid(facets = gene~gr) +
          scale_x_continuous(limits = c(0, 28)) +
          scale_y_continuous(limits = c(7, 10.5)) +
          labs(y = "log2 intensity (ComBat)") +
          theme_bw() +
          theme(axis.text    = element_text(size = 15),
                strip.text   = element_text(size = 20),
                axis.title.x = element_text(size = 20),
                legend.pos   = "none",
                panel.grid  = element_blank())
}

Figure 3a: Plot SLEA z-score of inflammatory genesets over time

Author: Slim Fourati

pd.fig3a <- getSleaPDSubset(inflamGS)
plotSLEASubsetByTime(pd.fig3a, "Inflammatory pathways (SLEA z-score)")

Figure s3a: Plot canonical inflammatory genes over time

Author: Slim Fourati

inflamGenes <- c("IL1B", "NLRP3", "TNFAIP2")
df.figs3a <- getSleaPDSubset.byGenes(inflamGenes)
plotSLEASubsetByTime.byGene(df.figs3a)

Figure s3b: Plot SLEA z-score of interferon genesets over time

Author: Slim Fourati

interferonGS  <- c("HALLMARK_INTERFERON_ALPHA_RESPONSE",
                   "HALLMARK_INTERFERON_GAMMA_RESPONSE")
pd.figs3b <- getSleaPDSubset(interferonGS)
plotSLEASubsetByTime(pd.figs3b, "Interferon-stimulated genes (SLEA z-score)")

Figure s3b (cont): Plot canonical interferon-regulated genes over time

Author: Slim Fourati

isgGenes <- c("STAT2", "IRF9", "MX1")
df.figs3b2 <- getSleaPDSubset.byGenes(isgGenes)
plotSLEASubsetByTime.byGene(df.figs3b2)

Figure 3c: Plot SLEA z-score of B cell genesets over time

Author: Slim Fourati

bcellGS <- c("enriched in B cells (I) (M47.0)",
             "enriched in B cells (V) (M47.4)",
             "plasma cells & B cells, immunoglobulins (M156.0)")
pd.fig3c <- getSleaPDSubset(bcellGS)
plotSLEASubsetByTime(pd.fig3c, "B cells (SLEA z-score)")

Figure s3c: Plot canonical B cell markers over time

Author: Slim Fourati

bcellGenes <- c("CD79A", "CD79B", "BANK1")
df.figs3c <- getSleaPDSubset.byGenes(bcellGenes)
plotSLEASubsetByTime.byGene(df.figs3c)

Figure 4a: Plot MFC based on inflamDF

Author: Slim Fourati

pd.fig4a <- pData(sleaSet)[sleaSet$study_time_collected <= 0, ]

df.fig4a <- pd.fig4a %>%
              merge(y = distinct(select(pData(esetWithResp), 
                                        participant_id, MFC, MFC_p30))) %>%
                group_by(study_accession) %>%
                mutate(sMFC = scale(MFC),
                       vaccine = paste0(pathogen,".", vaccine_type),
                       vaccine = ifelse(pathogen %in% "Meningococcus",
                                        yes = "Meningococcus",
                                        no  = vaccine))

ggplot(data = df.fig4a,
       mapping = aes(x = factor(gr), y = sMFC)) +
       geom_beeswarm(cex = 1.1, size = 0.75) +
       geom_boxplot(outlier.color = "transparent", 
                    fill = "transparent", 
                    col = "red") +
       labs(x = "Prevax inflammatory cluster") +
       theme_bw()

kruskal.test(x = df.fig4a$sMFC, g = df.fig4a$gr)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df.fig4a$sMFC and df.fig4a$gr
## Kruskal-Wallis chi-squared = 7.0348, df = 2, p-value = 0.02968
pairwise.wilcox.test(x = df.fig4a$sMFC, 
                     g = df.fig4a$gr, 
                     p.adjust.method = "none") %>%
    print()
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  df.fig4a$sMFC and df.fig4a$gr 
## 
##      low    mid   
## mid  0.1630 -     
## high 0.0095 0.1676
## 
## P value adjustment method: none

Table s4: Difference in MFC between low and high inflam. group per vaccine

Author: Slim Fourati

df.fig4a %>%
  filter(age_imputed < 50 & gr %in% c("low", "high")) %>%
  group_by(vaccine) %>%
  do(n = nrow(.),
    estimate = {
        fit <- try(wilcox.test(formula = sMFC~gr, 
                               data = ., 
                               conf.int = TRUE),
                   silent = TRUE)
        if ("try-error" %in% class(fit)) {
          NA
        } else {
          fit$estimate
        }
    },
    p = {
        fit <- try(wilcox.test(formula = sMFC~gr, data = ., conf.int = TRUE),
                 silent = TRUE)
      if ("try-error" %in% class(fit)) {
        NA
      } else {
        fit$p.value
      }
    }) %>%
    mutate(n = unlist(n),
           estimate = unlist(estimate), 
           p = unlist(p)) %>%
    print()
## Source: local data frame [9 x 4]
## Groups: <by row>
## 
## # A tibble: 9 x 4
##   vaccine                                   n   estimate       p
##   <chr>                                 <int>      <dbl>   <dbl>
## 1 Hepatitis B.Inactivated                  15 -0.0000499 0.731  
## 2 Influenza.Inactivated                   359 -0.333     0.00589
## 3 Influenza.Live attenuated                21 -0.0000207 0.373  
## 4 Meningococcus                            22 -0.0579    0.794  
## 5 Pneumococcus.Polysaccharide               6  0.695     0.0902 
## 6 Smallpox.Live attenuated                  2  0.784     1      
## 7 Tuberculosis.Recombinant Viral Vector     8  0.121     1      
## 8 Varicella Zoster.Live attenuated         13 -0.341     0.524  
## 9 Yellow Fever.Live attenuated             62 -0.0000579 0.854

Figure 4-split: Split by Influenza.Inactivated or others

Author: Slim Fourati

plotTemp <- filter(df.fig4a, !(vaccine %in% c("Pneumococcus.Polysaccharide",
                                              "Smallpox.Live attenuated",
                                              "Tuberculosis.Recombinant Viral Vector")))

ggplot(data = filter(plotTemp, age_imputed < 50),
        mapping = aes(x = factor(gr), y = sMFC)) +
        geom_beeswarm() +
        geom_boxplot(outlier.color = "transparent", 
                     fill = "transparent", 
                     col = "red") +
        labs(x = "Prevax inflammatory cluster", 
             title = "age < 50") +
        facet_wrap(facets = ~ifelse(test= vaccine %in% "Influenza.Inactivated",
                                    yes = "Influenza.Inactivated",
                                    no = "others")) +
        theme_bw()

filter(plotTemp, vaccine %in% "Influenza.Inactivated") %>%
  (function(p) { pairwise.wilcox.test(x = p$sMFC,
                                      g = p$gr,
                                      p.adjust.method = "none")}) %>%
  print()
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  p$sMFC and p$gr 
## 
##      low    mid   
## mid  0.3138 -     
## high 0.0059 0.0771
## 
## P value adjustment method: none
filter(plotTemp, vaccine != "Influenza.Inactivated") %>%
  (function(p) { pairwise.wilcox.test(x = p$sMFC,
                                      g = p$gr,
                                      p.adjust.method = "none")}) %>%
  print()
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  p$sMFC and p$gr 
## 
##      low  mid 
## mid  0.14 -   
## high 0.16 0.97
## 
## P value adjustment method: none